In [1]:
import os
import sys
import pandas as pd
import plotly.express as px
In [8]:
import os
os.chdir("../..")
In [10]:
os.getcwd()
Out[10]:
'/Volumes/GoogleDrive/My Drive/Computer Backups/Rahul Yerrabelli drive/PythonProjects/GeospatialAnalysis'
In [11]:
import geopandas
import rioxarray                 # Surface data manipulation
import xarray                    # Surface data manipulation
from pysal.explore import esda   # Exploratory Spatial analytics
from pysal.lib import weights    # Spatial weights
import contextily                # Background tiles
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
/Users/ryerrabelli/.conda/envs/GeospatialAnalysis/lib/python3.8/site-packages/spaghetti/network.py:36: FutureWarning: The next major release of pysal/spaghetti (2.0.0) will drop support for all ``libpysal.cg`` geometries. This change is a first step in refactoring ``spaghetti`` that is expected to result in dramatically reduced runtimes for network instantiation and operations. Users currently requiring network and point pattern input as ``libpysal.cg`` geometries should prepare for this simply by converting to ``shapely`` geometries.
  warnings.warn(f"{dep_msg}", FutureWarning)
In [155]:
fips2county = pd.read_csv("data/fips2county.tsv", sep="\t", comment='#', dtype=str).sort_values(by="CountyFIPS")
fips2countygeo = geopandas.read_file("data/plotly_usa_geojson-counties-fips.json").sort_values(by="id")
#fips2countygeo = geopandas.read_file("https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json")
In [177]:
countiesgeo = geopandas.read_file("https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json")
countiesgeo = countiesgeo.sort_values(by="id")

from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
    
#counties["features"][0]["id"]

counties2 = counties.copy()
counties2["features"] = sorted(counties2["features"], key=lambda d: d['id']) 
counties = counties2
In [162]:
import pandas as pd

# The ent CSV file only contains the counties which are analyzable
df_orig = pd.read_csv("data/2022_04_10 ent initial output.csv", dtype={"FIPS": str}).sort_values(by="FIPS")
# Merge with the fips 2 county standard data set
df_wide = pd.merge(left=df_orig, right=fips2county, how="left", left_on='FIPS', right_on='CountyFIPS')
# Insert a county "County, ST" col (i.e. "Freehold, NJ" or "Chicago, IL") for ease
df_wide.insert(1, "County_St", df_wide["CountyName"].astype(str) + ", " + df_wide["StateAbbr"].astype(str))
# Display with all the columns
with pd.option_context('display.max_rows', 3, 'display.max_columns', None): 
    display(df_wide)
    pass

loc_simple = ["FIPS", "CountyName","StateAbbr", "% ASC Billing", "Moran I score for ACS billing fraction"]
df_wide_simple=df_wide[loc_simple]

loc_main = ["FIPS", "County",	"StateFIPS", "Total Medicare Payment Amount", "% ASC Procedures", "% ASC Billing",	"CountyFIPS_3",	"CountyName",	"StateName",	"CountyFIPS",	"StateAbbr",	"STATE_COUNTY"]
#a=pd.merge(right=df_orig, left=fips2county, how="outer", right_on='FIPS', left_on='CountyFIPS')
#a=a.loc[:,loc_main]
#df_orig2=df_orig.loc[:,["FIPS","pop","Moran I score for ACS billing fraction","County"]]
FIPS County_St Total Number of Services Total Medicare Payment Amount Total Number of Services: 2019 Total Medicare Payment Amount: 2019 Total Number of Services: 2018 Total Medicare Payment Amount: 2018 Total Number of Services: 2017 Total Medicare Payment Amount: 2017 Total Number of Services: 2016 Total Medicare Payment Amount: 2016 Total Number of Services: 2015 Total Medicare Payment Amount: 2015 tot_ratio % ASC Procedures: 2019 % ASC Billing: 2019 % ASC Procedures: 2018 % ASC Billing: 2018 % ASC Procedures: 2017 % ASC Billing: 2017 % ASC Procedures: 2016 % ASC Billing: 2016 % ASC Procedures: 2015 % ASC Billing: 2015 % ASC Procedures % ASC Billing Beneficiaries with Part A and Part B Average Age Percent Male Percent Non-Hispanic White Percent African American Percent Hispanic Percent Eligible for Medicaid Average HCC Score Hospital Readmission Rate Emergency Department Visits per 1000 Beneficiaries Procedures Per Capita Standardized Costs Procedure Events Per 1000 Beneficiaries metro pct_poverty median_house_income pop 2013_Rural_urban_cont_code Pct_wthout_high_diploma Pct_wth_high_diploma Pct_wth_some_coll Pct_wth_coll_degree unemployment pct_uninsured fibro tabacco obesity migrane Alzheimers Depression Alcohol Abuse Drug Abuse Schizo_othr_psych COPD Chronic Kidney Disease Osteoporosis Stroke Diabetes Asthma Arthritis Hypertension Heart Failure Ischemic Heart Disease Population Density Medicare Population Density Moran I score for ACS billing fraction County StateFIPS CountyFIPS_3 CountyName StateName CountyFIPS StateAbbr STATE_COUNTY
0 01003 Baldwin, AL 524.0 29853.60 50.0 4107.68 238.0 12862.19 236.0 12883.73 0.0 0.00 0.0 0.00 15.0 0.00000 0.000000 0.0 0.0 0.000000 0.000000 0.0 0.0 0.0 0.0 0.000000 0.000000 47450.4 72.2 46.114 92.722 4.384 1.06 8.484 0.920 15.616 565.6 811.670 7523.2 1 10.92 56582.6 212830.0 3.0 9.2 27.7 31.3 31.9 4.32 11.52 19.8 8.4 20.8 3.0 10.82 15.90 1.775 3.150 2.14 11.18 20.40 6.20 3.86 23.58 4.76 37.42 60.28 12.38 32.04 133.873533 29.847074 Low-High Baldwin 01 003 Baldwin Alabama 01003 AL AL | BALDWIN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
940 56041 Uinta, WY 347.0 92886.86 179.0 65588.66 34.0 5826.04 47.0 9214.44 51.0 9390.43 36.0 2867.29 17.0 43.01676 72.010649 0.0 0.0 25.531915 72.933569 0.0 0.0 0.0 0.0 25.648415 58.082747 2939.2 70.0 48.514 0.000 0.000 0.00 14.766 0.818 12.368 579.0 529.996 3911.8 0 9.82 65848.4 20478.8 7.0 7.3 41.5 35.2 16.0 4.74 12.90 16.0 9.4 11.4 2.2 7.08 16.22 2.200 2.675 3.26 11.08 15.58 2.96 2.22 23.26 3.72 24.48 39.52 11.28 22.38 9.839597 1.412219 Non Significant Uinta 56 041 Uinta Wyoming 56041 WY WY | UINTA

941 rows × 80 columns

In [ ]:
 
In [13]:
cols_to_keep = ["FIPS","County_St"]
col_categories = ["Total Number of Services:", "Total Medicare Payment Amount:", "% ASC Procedures:", "% ASC Billing:"]

df_longs = []

# Convert each type of category to long format in separate dataframes
for col_category in col_categories:
        df_long = df_wide.melt(id_vars=cols_to_keep, 
                               var_name="Year", 
                               value_vars=[f"{col_category} {year}" for year in range(2015, 2019 +1)], 
                               value_name=f"{col_category} in Year",
                               )
        df_long["Year"] = df_long["Year"].replace({ f"{col_category} {year}":f"{year}" for year in range(2015, 2019 +1)})
        df_longs.append(df_long)

# Merge the separate category dataframes
df_long = df_longs[0]
for ind in range(1,len(df_longs)):
    df_long = pd.merge(left=df_long, right=df_longs[ind], how="outer", on=(cols_to_keep+["Year"]) )

# Merge with the overall wide dataframe to keep those other values
df_long = pd.merge(left=df_long, 
                   right=df_wide.drop([f"{col_category} {year}" for year in range(2015, 2019 +1) for col_category in col_categories], axis=1), 
                   how="left", on=cols_to_keep)

display(df_long)
FIPS County_St Year Total Number of Services: in Year Total Medicare Payment Amount: in Year % ASC Procedures: in Year % ASC Billing: in Year Total Number of Services Total Medicare Payment Amount tot_ratio ... Medicare Population Density Moran I score for ACS billing fraction County StateFIPS CountyFIPS_3 CountyName StateName CountyFIPS StateAbbr STATE_COUNTY
0 01017 Chambers, AL 2015 0.0 0.00 0.000000 0.000000 408.0 30064.800000 14.196990 ... 14.230610 Non Significant Chambers 01 017 Chambers Alabama 01017 AL AL | CHAMBERS
1 01033 Colbert, AL 2015 108.0 10404.39 0.000000 0.000000 272.0 37080.230000 16.000000 ... 22.681014 Non Significant Colbert 01 033 Colbert Alabama 01033 AL AL | COLBERT
2 01045 Dale, AL 2015 0.0 0.00 0.000000 0.000000 12.0 405.210000 0.999104 ... 17.700437 Non Significant Dale 01 045 Dale Alabama 01045 AL AL | DALE
3 01083 Limestone, AL 2015 0.0 0.00 0.000000 0.000000 55.0 9515.590000 4.000000 ... 29.157261 Non Significant Limestone 01 083 Limestone Alabama 01083 AL AL | LIMESTONE
4 05145 White, AR 2015 1217.0 48412.57 0.000000 0.000000 1269.0 52190.220000 11.995594 ... 15.224018 Non Significant White 05 145 White Arkansas 05145 AR AR | WHITE
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4700 21073 Franklin, KY 2019 0.0 0.00 0.000000 0.000000 114.0 7749.960000 3.910144 ... 62.858188 Non Significant Franklin 21 073 Franklin Kentucky 21073 KY KY | FRANKLIN
4701 56021 Laramie, WY 2019 422.0 79083.86 100.000000 100.000000 1784.0 337949.890001 21.000000 ... 6.286729 Non Significant Laramie 56 021 Laramie Wyoming 56021 WY WY | LARAMIE
4702 54041 Lewis, WV 2019 0.0 0.00 0.000000 0.000000 606.0 26648.230000 4.000000 ... 10.524948 Low-Low Lewis 54 041 Lewis West Virginia 54041 WV WV | LEWIS
4703 50027 Windsor, VT 2019 319.0 12093.61 0.000000 0.000000 1132.0 47825.130000 35.000000 ... 14.320922 Low-Low Windsor 50 027 Windsor Vermont 50027 VT VT | WINDSOR
4704 51041 Chesterfield, VA 2019 111.0 47135.49 28.828829 51.859904 617.0 204885.309999 34.339788 ... 124.679835 Non Significant Chesterfield 51 041 Chesterfield Virginia 51041 VA VA | CHESTERFIELD

4705 rows × 65 columns

In [163]:
df_wide_geom = pd.merge(left=countiesgeo, right=df_wide, how="right", left_on='id', right_on='FIPS')
df_wide_simple_geom = pd.merge(left=countiesgeo, right=df_wide_simple, how="right", left_on='id', right_on='FIPS')
In [164]:
df_wide_geom = df_wide_geom.set_index("FIPS").sort_index()
df_wide_simple_geom = df_wide_simple_geom.set_index("FIPS").sort_index()
In [179]:
fig = px.choropleth(df_wide_geom, geojson=counties2, locations=df_wide_geom.index, 
                    color='% ASC Procedures: 2019',
                    color_continuous_scale="Viridis",
                    #range_color=(0, 12),
                    scope="usa",
                    #facet_col="Moran I score for ACS billing fraction",
                    labels={
                        "2013_Rural_urban_cont_code":"2013-RUCA",
                        "pop":"Pop.",
                        "Average Age":"Mean Age",
                        "Percent Male":"% M",
                        "tot_ratio":"Tot. Ratio",
                        },
                    )

fig.update_layout(
    hoverlabel=dict(
        bgcolor="white",
        font_size=16,
        font_family="Rockwell",
        align="auto"
    )
)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

# Define layout specificities
fig.update_layout(
    margin={"r":0,"t":0,"l":0,"b":0},
    title={
        'text': f"% ASC Procedures 2019",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    }
)
fig.show()


#save_figure(fig,"choropleth-total")
In [186]:
fig = px.choropleth(df_wide_simple_geom, geojson=df_wide_simple_geom.geometry, locations=df_wide_simple_geom.index, 
                    color='% ASC Billing',
                    color_continuous_scale="Viridis",
                    #range_color=(0, 12),
                    scope="usa",
                    #facet_col="Moran I score for ACS billing fraction",
                    labels={
                        "2013_Rural_urban_cont_code":"2013-RUCA",
                        "pop":"Pop.",
                        "Average Age":"Mean Age",
                        "Percent Male":"% M",
                        "tot_ratio":"Tot. Ratio",
                        },
                    )

fig.update_layout(
    hoverlabel=dict(
        bgcolor="white",
        font_size=16,
        font_family="Rockwell",
        align="auto"
    )
)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

# Define layout specificities
fig.update_layout(
    margin={"r":0,"t":0,"l":0,"b":0},
    title={
        'text': f"% ASC Billing",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    }
)
fig.show()


#save_figure(fig,"choropleth-total")
In [192]:
w = weights.KNN.from_dataframe(df_wide_simple_geom, k=8)
# Row-standardization
w.transform = 'R'
In [194]:
db = df_wide_simple_geom.copy()
In [195]:
db['% ASC Billing_lag'] = weights.spatial_lag.lag_spatial(
    w, db['% ASC Billing']
)
In [196]:
fig = px.choropleth(db, 
                    geojson=db.geometry, 
                    locations=db.index, 
                    color='% ASC Billing_lag',
                    color_continuous_scale="Viridis",
                    #range_color=(0, 12),
                    scope="usa",
                    )

fig.show()


#save_figure(fig,"choropleth-total")
In [198]:
w.transform = 'R'
moran = esda.moran.Moran(db['% ASC Billing'], w)
In [201]:
moran.I, moran.p_sim
Out[201]:
(0.12196058782012296, 0.001)
In [202]:
lisa = esda.moran.Moran_Local(db['% ASC Billing'], w)
In [204]:
import seaborn
# Draw KDE line
ax = seaborn.kdeplot(lisa.Is)
# Add one small bar (rug) for each observation
# along horizontal axis
seaborn.rugplot(lisa.Is, ax=ax);
In [207]:
from splot import esda as esdaplot
import matplotlib.pyplot as plt  # Graphics
from matplotlib import colors

# Set up figure and axes
f, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 12))
# Make the axes accessible with single indexing
axs = axs.flatten()

                    # Subplot 1 #
            # Choropleth of local statistics
# Grab first axis in the figure
ax = axs[0]
# Assign new column with local statistics on-the-fly
db.assign(
    Is=lisa.Is
# Plot choropleth of local statistics
).plot(
    column='Is', 
    cmap='plasma', 
    scheme='quantiles',
    k=5, 
    edgecolor='white', 
    linewidth=0.1, 
    alpha=0.75,
    legend=True,
    ax=ax
)

                    # Subplot 2 #
                # Quadrant categories
# Grab second axis of local statistics
ax = axs[1]
# Plot Quandrant colors (note to ensure all polygons are assigned a
# quadrant, we "trick" the function by setting significance level to
# 1 so all observations are treated as "significant" and thus assigned
# a quadrant color
esdaplot.lisa_cluster(lisa, db, p=1, ax=ax);

                    # Subplot 3 #
                # Significance map
# Grab third axis of local statistics
ax = axs[2]
# 
# Find out significant observations
labels = pd.Series(
    1 * (lisa.p_sim < 0.05), # Assign 1 if significant, 0 otherwise
    index=db.index           # Use the index in the original data
# Recode 1 to "Significant and 0 to "Non-significant"
).map({1: 'Significant', 0: 'Non-Significant'})
# Assign labels to `db` on the fly
db.assign(
    cl=labels
# Plot choropleth of (non-)significant areas
).plot(
    column='cl', 
    categorical=True,
    k=2,
    cmap='Paired',
    linewidth=0.1,
    edgecolor='white',
    legend=True,
    ax=ax
)

                       
                    # Subplot 4 #
                    # Cluster map
# Grab second axis of local statistics
ax = axs[3]
# Plot Quandrant colors In this case, we use a 5% significance
# level to select polygons as part of statistically significant
# clusters
esdaplot.lisa_cluster(lisa, db, p=0.05, ax=ax);

                    # Figure styling #
# Set title to each subplot
for i, ax in enumerate(axs.flatten()):
    ax.set_axis_off()
    ax.set_title(
        [
            'Local Statistics', 
            'Scatterplot Quadrant', 
            'Statistical Significance', 
            'Moran Cluster Map'
        ][i], y=0
    )
# Tight layout to minimise in-betwee white space
f.tight_layout()

# Display the figure
plt.show()
In [ ]: